#### MASTER FILE
## From Unemployment to Self-employment: An Evaluation of Self-employment Assistance Programs
## by Alexandre Gaillard & Sumudu Kankanmage 
## June 2023.

# LIBRARY #
library(foreign)
library(readstata13)
library(tidyr)
library(data.table)
library(questionr)
library(matrixStats)
library(graphics)
library(weights)
library(DescTools)
library(GiniWegNeg)
library(ggplot2)
library(xtable)

# Lorenz curve:
lorenz <- function(data, data2) {
  cumulative = c()
  totalwealth = c()
  cumulativewealth = c()
  
  size=length(data)
  totalcumul = 0
  for(i in 1:size) {totalcumul = totalcumul + data[i] }
  
  cumulativewealth[1] = data2[1]*(data[1]/totalcumul)
  cumulative[1] = data[1]/totalcumul
  
  for(i in 1:size) { totalwealth[i] = data2[i]*(data[i]/totalcumul)}
  
  totalw = sum(totalwealth)
  for(i in 2:size) { cumulative[i] = cumulative[i-1] + (data[i]/totalcumul)}
  for(i in 2:size) {cumulativewealth[i] = cumulativewealth[i-1] + data2[i]*(data[i]/totalcumul)}
  
  cumulativew = cumulativewealth/totalw
  lorenzpoint = cbind(cumulative, cumulativew)
}

giniindex <- function(data1,data2) {
  aera = c()
  size = length(data1)
  aera[1] = 0
  for(i in 2:size) { aera[i] = aera[i - 1] + (data2[i] + data2[i-1])*(data1[i] - data1[i-1]) }
  giniindex3 = 1 - aera[length(aera)]
}


#table top percent with interpolation
tabletop2 <- function(data1,data2,level) {
  gap = data2[data1>=level] - level
  top1val = data2[which.max(data1>=level)]*gap + (1 - gap)*data2[which.max(data1>=level) - 1]
  top1 = 1- top1val[1]
}

#table top percent
tabletop <- function(data1,data2,level) {
  top1val = data2[data1>=level]
  top1 = 1- top1val[1]
}

weighted.var2 <- function(x, w, na.rm = TRUE) {
  if (na.rm) {
    w <- w[i <- !is.na(x)]
    x <- x[i]
  }
  sum.w <- sum(w)
  (sum(w*x^2) * sum.w - sum(w*x)^2) / (sum.w^2 - sum(w^2))
}


## compute all the SCF statistics.
setwd("/Users/alexandregaillard/Dropbox (Personal)/JOLE_revision/Data/SCF/data")

SCF_files_1 = c("p95i6.dta","p98i6.dta","p01i6.dta","p04i6.dta","p07i6.dta","p10i6.dta","p13i6.dta")
SCF_files_2 = c("rscfp1995.dta","rscfp1998.dta","rscfp2001.dta","rscfp2004.dta","rscfp2007.dta","rscfp2010.dta","rscfp2013.dta")



frac_SE = c()
frac_EE = c()
frac_WW = c()
frac_UU = c()
frac_SE_emp = c()
frac_EE_emp = c()
frac_SE_emp_lskill = c()
frac_SE_emp_hskill = c()
frac_zero  = c()
frac_10_median = c()
median_SE_WW   = c()
median_SE_ALL  = c()
median_SE_WW_OCC_INC    = c()
mean_SE_WW_OCC_IN       = c()
median_SE_bus_to_wealth = c()
mean_SE_bus_to_wealth   = c()
gini_wealth = c()
sd_ratio_log_SE_WW_OCC_INC = c()
sd_ratio_SE_WW_OCC_INC     = c()
sd_ratio_SE_WW_OCC_INC     = c()
mean_SE_WW_INCOME        = c()
mean_SE_WW_OCC_INC       = c()
mean_SE_OCC_INC_wealth   = c()
median_SE_DEBT_OCC_INC   = c()
mean_SE_DEBT_OCC_INC   = c()
median_SE_OCC_INC_wealth = c()
median_WW_INCOME_wealth  = c()
median_SE_WW_OCC_INC     = c()
median_SE_WW_INCOME      = c()
median_SE_INCOME_wealth  = c()
K_ent_frac = c()
gini_SE_occ_inc = c()
gini_WW_occ_inc = c()
neg_inc_SE   = c()
neg_inc_SE_2 = c()
neg_inc_SE_3 = c()
norm_inc_2.5 = c()
norm_inc_2.0 = c()
norm_inc_1.5 = c()
norm_inc_1.0 = c()
norm_inc_0.5 = c()
norm_inc_0.3 = c()
norm_inc_0.1 = c()
mean_SE_DEBT_OCC_INC = c()
frac_BO = c()
sd_ratio_SE_WW_INC = c()
frac_BO_emp = c()



i = 7
for(i in 1:length(SCF_files_1)){
  
  data2 = read.dta13(SCF_files_1[i])
  data1 = read.dta13(SCF_files_2[i])
  colnames(data2) <- toupper(colnames(data2))
  colnames(data1) <- toupper(colnames(data1))
  SCF_all = merge(data2,data1,by="YY1")
  
  ## occupational rate:
  ## definition of a self-employed individual is someone who work in his business + declare being self-employed.
  # SE    = SCF_all[which(SCF_all$X4106 %in% c(2,3) & SCF_all$X3111 >= 1 & SCF_all$AGE %in% c(20:65)),]
  # SE    = SCF_all[which(SCF_all$X4106 %in% c(2,3,4) & SCF_all$X3104 == 1 & SCF_all$AGE %in% c(20:65)),]
  SE    = SCF_all[which(SCF_all$X4106 %in% c(2,3,4) & SCF_all$X3104 == 1 & SCF_all$AGE %in% c(20:65)),]
  WW    = SCF_all[which(SCF_all$X4106 %in% c(1) & SCF_all$AGE %in% c(20:65)),]
  BO    = SCF_all[which(SCF_all$X3103 ==1 | SCF_all$X3401 == 1),]
  BO$nb_bus = BO$X3105 + BO$X3402
  table(BO$nb_bus)
  wtd.table(x = BO$nb_bus, weights = BO$WGT)$sum.of.weights/sum(BO$WGT)*100
  sum(BO$nb_bus*BO$WGT)/sum(BO$WGT)
  #WW_2  = SCF_all[which(SCF_all$X4106 %in% c(2,3) & SCF_all$X3111 <= 0 & SCF_all$AGE %in% c(20:65)),]
  #WW    = rbind(WW,WW_2)
  WW_2  = SCF_all[which(SCF_all$X4106 %in% c(2,3,4) & SCF_all$X3104 != 1 & SCF_all$AGE %in% c(20:65)),]
  WW    = rbind(WW,WW_2)
  UU    = SCF_all[which(SCF_all$X6670 %in% c(3) & SCF_all$AGE %in% c(20:65)),]
  
  OTHER = SCF_all[which(SCF_all$OCCAT1 == 4 & SCF_all$AGE %in% c(20:65)),]
  ENT   = SE[which(SE$BUS > 0 | SE$ACTBUS > 0),] # ACTIVELY MANAGING AND WITH A BUSINESS VAlUE # or ENT = SE[which(SE$kgbus > 0),] 
  ENT   = SE[which(SE$X3104 == 1),] # ACTIVELY MANAGING AND WITH A BUSINESS VAlUE # or ENT = SE[which(SE$kgbus > 0),] 
  ENT$ACTBUS
  ALL   = rbind(UU,WW,SE)
  
  # self-rate
  frac_BO[i] = sum(BO$WGT,na.rm=T)/sum(ALL$WGT,na.rm=T)
  frac_SE[i] = sum(SE$WGT,na.rm=T)/sum(ALL$WGT,na.rm=T)
  frac_EE[i] = sum(ENT$WGT,na.rm=T)/sum(ALL$WGT,na.rm=T) 
  frac_WW[i] = sum(WW$WGT,na.rm=T)/sum(ALL$WGT,na.rm=T)
  frac_UU[i] = sum(UU$WGT,na.rm=T)/sum(ALL$WGT,na.rm=T)
  
  ## self-employed employer
  frac_BO_emp[i] = sum(BO$WGT[which(BO$X3111 >= 2)],na.rm=T)/sum(BO$WGT[which(BO$X3111 >= 0)],na.rm=T)
  frac_SE_emp[i] = sum(SE$WGT[which(SE$X3111 >= 2)],na.rm=T)/sum(SE$WGT[which(SE$X3111 >= 0)],na.rm=T)
  frac_EE_emp[i] = sum(ENT$WGT[which(ENT$X3111 >= 2)],na.rm=T)/sum(ENT$WGT[which(ENT$X3111 >= 0)],na.rm=T)
  
  ## by ability group.
  SE_low_skill = SE[which(SE$X5904 != 1),]
  SE_hig_skill = SE[which(SE$X5904 == 1),]
  frac_SE_emp_lskill[i] = sum(SE_low_skill$WGT[which(SE_low_skill$X3111 >= 2)],na.rm=T)/sum(SE_low_skill$WGT,na.rm=T) 
  frac_SE_emp_hskill[i] = sum(SE_hig_skill$WGT[which(SE_hig_skill$X3111 >= 2)],na.rm=T)/sum(SE_hig_skill$WGT,na.rm=T) 
  
  ## share of zero + negative INCOME.
  ALLzero = ALL[ALL$NETWORTH <= 0,]
  frac_zero[i] = sum(ALLzero$WGT)/sum(ALL$WGT)
  # with very low net worth #
  negworth = ALL[which(ALL$NETWORTH <= weightedMedian(ALL$NETWORTH, ALL$WGT)/10),]
  frac_10_median[i] = sum(negworth$WGT)/sum(ALL$WGT) # 10.255% of zero net worth
  
  
  # wealth
  median_SE_WW[i]  = weightedMedian(SE$NETWORTH, SE$WGT)/weightedMedian(WW$NETWORTH, WW$WGT) 
  median_SE_ALL[i] = weightedMedian(SE$NETWORTH, SE$WGT)/weightedMedian(ALL$NETWORTH, ALL$WGT) 
  
  ## median INCOME
  median_SE_bus_to_wealth[i]  = weightedMedian(SE$BUS, SE$WGT)/weightedMedian(SE$NETWORTH, SE$WGT)
  mean_SE_bus_to_wealth[i]    = weightedMean(SE$BUS, SE$WGT)/weightedMean(SE$NETWORTH, SE$WGT)
  
  ## Gini wealth
  gini_wealth[i]  = Gini_RSV(ALL$NETWORTH, w = ALL$WGT)$GINI_RSV
  
  # Compare with model #
  WW$WAGEINC_pos  = ifelse(WW$WAGEINC <= 0.0, 1, WW$WAGEINC)
  SE$earning_pos  = ifelse(SE$BUSSEFARMINC + SE$WAGEINC <= 0.0, 1, SE$BUSSEFARMINC + SE$WAGEINC)
  
  sd_ratio_log_SE_WW_OCC_INC[i] = sqrt(weighted.var2(log(SE$earning_pos), SE$WGT))/sqrt(weighted.var2(log(WW$WAGEINC_pos), WW$WGT))
  sd_ratio_SE_WW_OCC_INC[i]     = sqrt(weighted.var2(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT))/sqrt(weighted.var2(WW$WAGEINC, WW$WGT))
  sd_ratio_SE_WW_INC[i]         = sqrt(weighted.var2(SE$INCOME, SE$WGT))/sqrt(weighted.var2(WW$INCOME, WW$WGT))
  
  mean_SE_WW_INCOME[i]        = weightedMean(SE$INCOME, SE$WGT)/weightedMean(WW$INCOME, WW$WGT)
  mean_SE_WW_OCC_INC[i]       = weightedMean(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)/weightedMean(WW$WAGEINC, WW$WGT)
  mean_SE_OCC_INC_wealth[i]   = weightedMean(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)/weightedMean(SE$NETWORTH, SE$WGT)
  
  median_SE_DEBT_OCC_INC[i]   = weightedMedian(SE$DEBT, SE$WGT)/weightedMedian(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)
  mean_SE_DEBT_OCC_INC[i]     = weightedMean(SE$DEBT, SE$WGT)/weightedMean(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)
  median_SE_OCC_INC_wealth[i] = weightedMedian(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)/weightedMedian(SE$NETWORTH, SE$WGT)
  median_WW_INCOME_wealth[i]  = weightedMedian(WW$INCOME, WW$WGT)/weightedMedian(WW$NETWORTH, WW$WGT)
  median_SE_INCOME_wealth[i]  = weightedMedian(SE$INCOME, SE$WGT)/weightedMedian(SE$NETWORTH, SE$WGT)
  median_SE_WW_OCC_INC[i]     = weightedMedian(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)/weightedMedian(WW$WAGEINC, WW$WGT)
  median_SE_WW_INCOME[i]      = weightedMedian(SE$INCOME, SE$WGT)/weightedMedian(WW$INCOME, WW$WGT)
  
  
  ## fraction of capital of self-employed agents.
  entK <- sum(SE$NETWORTH*SE$WGT)
  popK <- sum(ALL$NETWORTH*ALL$WGT)
  K_ent_frac[i] = entK/popK
  
  
  #sum(ENT$BUSSEFARMINC*(ENT$WGT/sum(ENT$WGT)))/(sum(WW$WAGEINC*(WW$WGT/sum(WW$WGT)))+sum(ENT$BUSSEFARMINC*(ENT$WGT/sum(ENT$WGT))))
  gini_SE_occ_inc[i] = Gini_RSV(SE$BUSSEFARMINC + SE$WAGEINC, SE$WGT)$GINI_RSV
  gini_WW_occ_inc[i] = Gini_RSV(WW$WAGEINC, WW$WGT)$GINI_RSV
  ENTpos = SE[which(SE$BUSSEFARMINC + SE$WAGEINC > 100.0),]
  ENTall = SE[which(SE$BUSSEFARMINC + SE$WAGEINC > 100.0 | SE$BUSSEFARMINC + SE$WAGEINC <= -11),]
  ENTneg = SE[which(SE$BUSSEFARMINC + SE$WAGEINC <= 0),]
  neg_inc_SE[i] = sum(as.numeric(ENTneg$WGT))/sum(as.numeric(ENTall$WGT)) # 3.23% have negative or 0 entrepreneurial's INCOME (CAUTION: this take into account family's INCOME)
  ENTneg = SE[which(SE$INCOME <= 0),]
  neg_inc_SE_2[i] = sum(as.numeric(ENTneg$WGT))/sum(as.numeric(ENT$WGT)) # 3.23% have negative or 0 entrepreneurial's INCOME (CAUTION: this take into account family's INCOME)
  ENTneg = SE[which(SE$BUSSEFARMINC < 0),]
  neg_inc_SE_3[i] = sum(as.numeric(ENTneg$WGT))/sum(as.numeric(ENTall$WGT)) # 3.23% have negative or 0 entrepreneurial's INCOME (CAUTION: this take into account family's INCOME)
  
  ## normalized income.
  SE$normINC = (SE$INCOME)/weightedMedian(SE$INCOME, as.numeric(SE$WGT))
  ENTincless2 = SE[which(SE$normINC < 2.5),]
  norm_inc_2.5[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  ENTincless2 = SE[which(SE$normINC < 2.0),]
  norm_inc_2.0[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  ENTincless2 = SE[which(SE$normINC < 1.5),]
  norm_inc_1.5[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  ENTincless2 = SE[which(SE$normINC < 1.0),]
  norm_inc_1.0[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  ENTincless2 = SE[which(SE$normINC < 0.5),]
  norm_inc_0.5[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  ENTincless2 = SE[which(SE$normINC < 0.3),]
  norm_inc_0.3[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  ENTincless2 = SE[which(SE$normINC < 0.1),]
  norm_inc_0.1[i] = sum(ENTincless2$WGT)/sum(SE$WGT)
  
  print(i)
}


list_stats_main_FINAL = list(frac_SE*100,frac_UU*100,median_SE_WW,median_SE_ALL,median_SE_WW_INCOME,frac_10_median*100,
                       frac_zero*100,gini_wealth,gini_SE_occ_inc,K_ent_frac*100,mean_SE_DEBT_OCC_INC,median_SE_INCOME_wealth,
                       median_WW_INCOME_wealth,neg_inc_SE*100,neg_inc_SE_3*100,sd_ratio_SE_WW_OCC_INC,frac_SE_emp*100,
                       frac_SE_emp_lskill*100,frac_SE_emp_hskill*100)

## calculate the mean
means_main_FINAL <- sapply(list_stats_main_FINAL, mean, na.rm = TRUE)

## calculate standard deviations for each element in the list
sds_main_FINAL <- sapply(list_stats_main_FINAL, sd, na.rm = TRUE)

## calculate the mean
minval_main_FINAL <- sapply(list_stats_main_FINAL, min, na.rm = TRUE)

## calculate standard deviations for each element in the list
maxval_main_FINAL <- sapply(list_stats_main_FINAL, max, na.rm = TRUE)


SCF_table_main_FINAL = round(cbind(means_main_FINAL,sds_main_FINAL,minval_main_FINAL,maxval_main_FINAL),3)
rownames(SCF_table_main_FINAL) <- c("Fraction of self-employed (%)",
                              "Fraction of unemployed (%)",
                              "Ratio of median net worth (self-employed to worker)",
                              "Ratio of median net worth (self-employed to all population)",
                              "Ratio of median income (self-employed to worker)",
                              "Fraction of pop. with net worth < 1/10 of median (%)",
                              "Fraction of pop. with zero net worth",
                              "Gini coefficient -- wealth",
                              "Gini coefficient -- self employed earnings",
                              "Fraction of capital held by self-employed (%)",
                              "Ratio of mean self-employed debt to earnings",
                              "Ratio of median self-employed income to net worth (%)",
                              "Ratio of median worker income to worker net worth (%)",
                              "% of self-employed with zero or negative income (%)",
                              "% of self-employed with negative profit (%)",
                              "Standard deviation of self-employed to worker earnings",
                              "Fraction of self-employed who are employer (%)",
                              "Fraction of employers among non-college self-employed (%)",
                              "Fraction of employers among college self-employed (%)")

xtable(SCF_table_main_FINAL,digits=2)

mean(norm_inc_2.5)
mean(norm_inc_2.0)
mean(norm_inc_1.5)
mean(norm_inc_1.0)
mean(norm_inc_0.5)
mean(norm_inc_0.3)
mean(norm_inc_0.1)



### STATISTICS -- MODEL
setwd("/Users/alexandregaillard/Dropbox (Personal)/JOLE_revision/Code/SEA_paper/")
model      = read.table("OUTPUT/distribution/distJ_POLICY=0_OPT_labor_demand=1_Teq=1_Weq=1_Req=1_rhostar=0.400000_pLTpar=0.500000.txt", header=TRUE)
model2     = read.table("OUTPUT/VF/JNEvalues.out", header=TRUE)
WWmodel    = read.table("OUTPUT/distribution/distWW.out", header=TRUE)
distwealth = read.table("OUTPUT/distribution/distfilewealth_POLICY=0_OPT_labor_demand=1_Teq=1_Weq=1_Req=1_rhostar=0.400000_pLTpar=0.500000.txt", header=TRUE)

frac_SE_model     = sum(distwealth$J)
frac_UU_model     = sum(distwealth$Ui+distwealth$Un)
frac_SE_emp_model = 0.66
frac_SE_emp_lskill_model = 0.58
frac_SE_emp_hskill_model = 0.77
frac_zero_model      = 0.045

sum(distwealth$J*distwealth$ASSET)/sum(distwealth$all*distwealth$ASSET)


# determine how much individuals is below 1/100 median wealth #
median_wealth = weightedMedian(distwealth$ASSET, distwealth$all)
distwealthzero = distwealth[distwealth$ASSET <= median_wealth/10,]
frac_10_median_model = sum(distwealthzero$all)
WWmodel$earnings = WWmodel$INCOME 
WWmodel$distWW   = WWmodel$distWWE + WWmodel$distWWNE
weighted.var2 <- function(x, w, na.rm = TRUE) {
  if (na.rm) {
    w <- w[i <- !is.na(x)]
    x <- x[i]
  }
  sum.w <- sum(w)
  (sum(w*x^2) * sum.w - sum(w*x)^2) / (sum.w^2 - sum(w^2))
}
# lower than in the data.
sd_ratio_SE_WW_OCC_INC_model = sqrt(weighted.var2(model$incJNE,model$distJNE))/sqrt(weighted.var2(WWmodel$INCOME,WWmodel$distWWNE)) # std dev of income is 2.5 larger.
grid = 150

JNEneg = model[which(model$busincJNE < 0.0),]
JEneg  = model[which(model$busincJE < 0.0),]
neg    = sum(JNEneg$distJNE) + sum(JEneg$distJE)
neg2   = sum(JNEneg$distJNEstay) + sum(JEneg$distJEstay)
totalENT = sum(model$distJNE) + sum(model$distJE)
neg_inc_SE_model = neg/totalENT 
neg_inc_SE_stay_model = neg2/totalENT# percentage with negative income


JNEneg = model[which(model$incJNE <= 0.0),]
JEneg = model[which(model$incJE <= 0.0),]
neg = sum(JNEneg$distJNE) + sum(JEneg$distJE)
neg2 = sum(JNEneg$distJNEstay) + sum(JEneg$distJEstay)
totalENT = sum(model$distJNE) + sum(model$distJE)
neg_inc_SE_2_model = neg2/totalENT

JNE_tot = model
JNE_tot[1:(grid*7*7*3),] = model
JNE_tot[(grid*7*7*3+1):((grid*7*7*3)*2),] = model

JNE_tot$distE[1:(grid*7*7*3)] = JNE_tot$distJE[1:(grid*7*7*3)]
JNE_tot$distE[(grid*7*7*3+1):((grid*7*7*3)*2)] = JNE_tot$distJNE[(grid*7*7*3+1):((grid*7*7*3)*2)]

JNE_tot$distEstay[1:(grid*7*7*3)] = JNE_tot$distJEstay[1:(grid*7*7*3)]
JNE_tot$distEstay[(grid*7*7*3+1):((grid*7*7*3)*2)] = JNE_tot$distJNEstay[(grid*7*7*3+1):((grid*7*7*3)*2)]

JNE_tot$incE[1:(grid*7*7*3)] = JNE_tot$incJE[1:(grid*7*7*3)]
JNE_tot$incE[(grid*7*7*3+1):((grid*7*7*3)*2)] = JNE_tot$incJNE[(grid*7*7*3+1):((grid*7*7*3)*2)]

JNE_tot$busincE[1:(grid*7*7*3)] = JNE_tot$busincJE[1:(grid*7*7*3)]
JNE_tot$busincE[(grid*7*7*3+1):((grid*7*7*3)*2)] = JNE_tot$busincJNE[(grid*7*7*3+1):((grid*7*7*3)*2)]

JNE_tot_neg = JNE_tot[which(JNE_tot$incE <= 0.0),]
sum(JNE_tot_neg$distE)/sum(JNE_tot$distE)

JNE_tot$earning_pos = ifelse(JNE_tot$busincE <= 0.0, 1, JNE_tot$busincE)
WWmodel$wageinc_pos = ifelse(WWmodel$earnings <= 0.0, 1, WWmodel$earnings)
sd_ratio_log_SE_WW_OCC_INC_model = sqrt(weighted.var2(log(JNE_tot$earning_pos), JNE_tot$distE))/sqrt(weighted.var2(log(WWmodel$wageinc_pos), WWmodel$distWW))
sd_ratio_SE_WW_OCC_INC_model = sqrt(weighted.var2(JNE_tot$earning_pos, JNE_tot$distE))/sqrt(weighted.var2(WWmodel$wageinc_pos, WWmodel$distWW))

# GINI ENTREPRENEUR'S EARNING #
distinc = c()
distinc = model$distJNE[1:nrow(model)]
distinc[(nrow(model)+1):(2*nrow(model))] = model$distJE[1:nrow(model)]
inc = c()
inc = model$busincJNE[1:nrow(model)]
inc[(nrow(model)+1):(2*nrow(model))] = model$busincJE[1:nrow(model)]
incomeJ = as.data.frame(cbind(distinc, inc))
incomeJ = incomeJ[order(incomeJ$inc),]
gini_SE_occ_inc_model = Gini_RSV(incomeJ[,2],incomeJ[,1])$GINI_RSV
gini_wealth_model = Gini_RSV(distwealth$ASSET,distwealth$all)$GINI_RSV

modelinc = incomeJ[incomeJ[,2] > 0.0,]
x = Lc(modelinc[,2], n = modelinc[,1])
weightedMean(incomeJ[,2], incomeJ[,1])/0.31

# assign AX value to DistJE #
# use of an ID
model$IDmerge = paste0(model$THETA*10+(model$ZETA+1)*1000+model$ASSET)
model2$IDmerge = paste0(model2$THETA*10+(model2$Z+1)*1000+model2$ASSET)
entmodel = merge(model, model2, by="IDmerge")
weightedMedian(entmodel$collateralJNE, entmodel$distJNE)/weightedMedian(entmodel$ASSET.x, entmodel$distJNE)
weightedMean(entmodel$collateralJNE, entmodel$distJNE)/weightedMedian(entmodel$ASSET.x, entmodel$distJNE)
entmodel = entmodel[which(entmodel$ASSET.x > 0),]
weightedMedian(entmodel$sloanJNE/entmodel$ASSET.x, entmodel$distJNE)
entmodel_borrow = entmodel[which(entmodel$sloanJNE > 0),]
sum(entmodel_borrow$distJNE)/totalENT # fraction that borrow.
weightedMedian((entmodel$sloanJNE), entmodel$distJNE)/weightedMedian((entmodel$incJNE*4), entmodel$distJNE) # fraction of median debt to income
median_SE_bus_to_wealth_model = weightedMedian((entmodel$incJNE*4), entmodel$distJNE)/weightedMedian((entmodel$ASSET.x), entmodel$distJNE) # fraction of median income to net worth
mean_SE_bus_to_wealth_model =  weightedMean((entmodel$incJNE*4), entmodel$distJNE)/weightedMean((entmodel$ASSET.x), entmodel$distJNE)
                                                                                                
median_SE_WW_model = weightedMedian(JNE_tot$ASSET, JNE_tot$distE)/weightedMedian(WWmodel$ASSET, (WWmodel$distWWE + WWmodel$distWWNE)) # RATIO OF E/pop
median_SE_ALL_model = weightedMedian(JNE_tot$ASSET, JNE_tot$distE)/weightedMedian(distwealth$ASSET, distwealth$all) # RATIO OF E/pop

median_SE_WW_INCOME_model = weightedMedian(model$incJNE, model$distJNE)/weightedMedian(WWmodel$INCOME, (WWmodel$distWWNE+WWmodel$distWWE))
mean_SE_WW_INCOME_model = weightedMean(model$incJNE, model$distJNE)/weightedMean(WWmodel$INCOME, (WWmodel$distWWNE+WWmodel$distWWE))

weightedMedian((entmodel$collateralJNE), entmodel$distJNE)/weightedMedian((entmodel$ASSET.x), entmodel$distJNE) # fraction of median debt to income


## compute the fraction that would have been better off in paid-employment ##
JNE_tot$ability[JNE_tot$THETA == 0] <- 0.393221996043480
JNE_tot$ability[JNE_tot$THETA == 1] <- 1
JNE_tot$ability[JNE_tot$THETA == 2] <- 2.54309273148958
JNE_tot$earningWW = JNE_tot$ability*0.253706723228092*(1-0.009027144467390)
fracNECESSITY = sum(JNE_tot$distE[JNE_tot$busincE < JNE_tot$earningWW])/0.107
fracNECESSITY


median_SE_WW_INCOME_model = weightedMedian(JNE_tot$incE, JNE_tot$distE)/weightedMedian(WWmodel$INCOME, (WWmodel$distWWNE+WWmodel$distWWE))
mean_SE_WW_INCOME_model   = weightedMean(JNE_tot$incE, JNE_tot$distE)/weightedMean(WWmodel$INCOME, (WWmodel$distWWNE+WWmodel$distWWE))

median_SE_WW_OCC_INC_model = weightedMedian(JNE_tot$busincE, JNE_tot$distE)/weightedMedian(WWmodel$earnings, (WWmodel$distWW))
mean_SE_WW_OCC_INC_model   = weightedMean(JNE_tot$busincE, JNE_tot$distE)/weightedMean(WWmodel$earnings, (WWmodel$distWW))

mean_SE_DEBT_OCC_INC_model = weightedMean(JNE_tot$sloanJNE, JNE_tot$distJNE)/weightedMean((JNE_tot$busincJNE*4), JNE_tot$distJNE) # fraction of median debt to income
mean_SE_OCC_INC_wealth_model = weightedMean((JNE_tot$busincE*4), JNE_tot$distE)/weightedMean((JNE_tot$ASSET), JNE_tot$distE) # fraction of median debt to income
median_SE_OCC_INC_wealth_model = weightedMedian((JNE_tot$busincE*4), JNE_tot$distE)/weightedMedian((JNE_tot$ASSET), JNE_tot$distE) # fraction of median debt to income

median_WW_INCOME_wealth_model = weightedMedian((WWmodel$INCOME*4), (WWmodel$distWW))/weightedMedian((WWmodel$ASSET), WWmodel$distWW) # fraction of median debt to income
median_SE_INCOME_wealth_model = weightedMedian((JNE_tot$incE*4), JNE_tot$distE)/weightedMedian((JNE_tot$ASSET), JNE_tot$distE) # fraction of median debt to income


JNE_tot$normincE = JNE_tot$incE/weightedMedian(JNE_tot$incE, as.numeric(JNE_tot$distE))
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 2.5),]
norm_inc_2.5_model = sum(modelless2JNE$distE)/totalENT
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 2.0),]
norm_inc_2.0_model = sum(modelless2JNE$distE)/totalENT
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 1.5),]
norm_inc_1.5_model = sum(modelless2JNE$distE)/totalENT
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 1.0),]
norm_inc_1.0_model = sum(modelless2JNE$distE)/totalENT
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 0.5),]
norm_inc_0.5_model = sum(modelless2JNE$distE)/totalENT
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 0.3),]
norm_inc_0.3_model = sum(modelless2JNE$distE)/totalENT
modelless2JNE = JNE_tot[which(JNE_tot$normincE < 0.1),]
norm_inc_0.1_model = sum(modelless2JNE$distE)/totalENT


## all the statistics.
list_stats_main_FINAL_model = list(frac_SE_model*100,frac_UU_model*100,median_SE_WW_model,median_SE_ALL_model,median_SE_WW_INCOME_model,frac_10_median_model*100,
                             frac_zero_model*100,gini_wealth_model,gini_SE_occ_inc_model,mean_SE_DEBT_OCC_INC_model,median_SE_INCOME_wealth_model,
                             median_WW_INCOME_wealth_model,neg_inc_SE_model*100,neg_inc_SE_2_model*100,sd_ratio_SE_WW_OCC_INC_model,frac_SE_emp_model*100,
                             frac_SE_emp_lskill_model*100,frac_SE_emp_hskill_model*100)

## calculate the mean
means_main_FINAL_model <- sapply(list_stats_main_FINAL_model, mean, na.rm = TRUE)

## calculate standard deviations for each element in the list
sds_main_FINAL_model <- sapply(list_stats_main_FINAL_model, sd, na.rm = TRUE)

## calculate the mean
minval_main_FINAL_model <- sapply(list_stats_main_FINAL_model, min, na.rm = TRUE)

## calculate standard deviations for each element in the list
maxval_main_FINAL_model <- sapply(list_stats_main_FINAL_model, max, na.rm = TRUE)


SCF_table_main_FINAL_model = round(cbind(means_main_FINAL_model,sds_main_FINAL_model,minval_main_FINAL_model,maxval_main_FINAL_model),3)
rownames(SCF_table_main_FINAL_model) <- c("Fraction of self-employed (%)",
                                    "Fraction of unemployed (%)",
                                    "Ratio of median net worth (self-employed to worker)",
                                    "Ratio of median net worth (self-employed to all population)",
                                    "Ratio of median income (self-employed to worker)",
                                    "Fraction of pop. with net worth < 1/10 of median (%)",
                                    "Fraction of pop. with zero net worth",
                                    "Gini coefficient -- wealth",
                                    "Gini coefficient -- self employed earnings",
                                    "Ratio of mean self-employed debt to earnings",
                                    "Ratio of median self-employed income to net worth (%)",
                                    "Ratio of median worker income to worker net worth (%)",
                                    "% of self-employed with zero or negative income (%)",
                                    "% of self-employed with negative profit (%)",
                                    "Standard deviation of self-employed to worker earnings",
                                    "Fraction of self-employed who are employer (%)",
                                    "Fraction of employers among non-college self-employed (%)",
                                    "Fraction of employers among college self-employed (%)")

xtable(SCF_table_main_FINAL_model,digits=2)

mean(norm_inc_2.5_model)
mean(norm_inc_2.0_model)
mean(norm_inc_1.5_model)
mean(norm_inc_1.0_model)
mean(norm_inc_0.5_model)
mean(norm_inc_0.3_model)
mean(norm_inc_0.1_model)

